import requests
from bs4 import BeautifulSoup as bs
import lxml
import json


def getPageText(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
    r = requests.get(url, headers=headers)
    r.encoding = 'gbk'
    return r.text


page = getPageText(
    'http://data.sports.sohu.com/nba/nba_players_rank.php?order_by=points&spm=smpc.fb-nba-home.top-dc.2.1620824904040nf6byr7')

headList = []


def getHead(soup):
    data = []
    head = soup.find(name='div', attrs={"class": "blue"})
    lis = head.find_all("li")
    for li in lis:
        data.append(li.text)
    return data


def getData(soup):
    data = []
    trs = soup.find_all("tr")
    for tr in trs:
        line = []
        tds = tr.find_all('td')
        for td in tds:
            line.append(td.text)
        data.append(line)
    return data


def printFile(soup):
    fo = open('球员.txt', 'wt', encoding='utf-8')
    peoples = []
    head = getHead(soup)
    data = getData(soup)
    for line in data:
        people = {}
        for i in range(len(line)):
            people[head[i]] = line[i]
        peoples.append(people)

    print(peoples)
    fo.writelines(str(peoples))
    fo.close()


soup = bs(page, 'lxml')
printFile(soup)

print('爬取完毕')
